/*
Do-file to create sibling order dataset from SOEP.
*/

clear all
set more off
set maxvar 20000

******
*1984*
******
***HH
use "D:\Data\ProjData\SiblingOrder\OtherSamples\SOEP\Data/apbrutto.dta", clear
keep if astell==0
keep hhnr persnr ageburt asex
rename persnr pid_hh
rename ageburt age_hh
replace age_hh = 84-age_hh if inrange(age_hh, 1, 83)
replace age_hh = 184-age_hh if inrange(age_hh, 84, 99)
replace age_hh = 84 if age_hh==0
gen age_hhsur = age_hh
rename asex sex_hh

save "D:\Data\ProjData\SiblingOrder\OtherSamples\SOEP\Temp/ahh.dta", replace



***Children
use "D:\Data\ProjData\SiblingOrder\OtherSamples\SOEP\Data/apbrutto.dta", clear

***Keep all biological children
*keep if astell==3
*keep if inrange(astell, 3, 4)
keep if astell==3 | astell==4 | astell==12
keep hhnr persnr ageburt asex
rename persnr pid_kid
rename ageburt age_kid
replace age_kid = 84-age_kid
rename asex sex_kid

***Merge children and HH
merge m:1 hhnr using "D:\Data\ProjData\SiblingOrder\OtherSamples\SOEP\Temp/ahh.dta"
keep if _merge==3
drop _merge


***Set age to age in 2013 (last survey)
foreach var of varlist age_kid age_hh {
replace `var' = `var'+29
}
*

*gen year = 1984

save "D:\Data\ProjData\SiblingOrder\OtherSamples\SOEP\Temp/amerged.dta", replace

erase "D:\Data\ProjData\SiblingOrder\OtherSamples\SOEP\Temp/ahh.dta"


***********
*1985-2011*
***********

foreach x in b c d e f g h i j k l m n o p q r s t u v w x y z ba bb {
***HH
use "D:\Data\ProjData\SiblingOrder\OtherSamples\SOEP\Data/`x'pbrutto.dta", clear
keep if `x'stell==0
keep hhnr persnr `x'geburt `x'sex
rename persnr pid_hh
rename `x'geburt age_hh
replace age_hh = . if age_hh==-1
replace age_hh = 2013-age_hh
gen age_hhsur = age_hh
rename `x'sex sex_hh

***Keep oldest HH and drop if two or more oldest of same age
bysort hhnr: egen occ = rank(age_hh), field
bysort hhnr: egen nocc = max(occ)
keep if nocc==occ
drop occ nocc
bysort hhnr: gen occ = _n
keep if occ==1
drop occ

save "D:\Data\ProjData\SiblingOrder\OtherSamples\SOEP\Temp/`x'hh.dta", replace

***Children
use "D:\Data\ProjData\SiblingOrder\OtherSamples\SOEP\Data/`x'pbrutto.dta", clear

***Keep all entering biological children
*keep if `x'stell==3
*keep if inrange(`x'stell, 3, 4)
keep if `x'stell==3 | `x'stell==4 | `x'stell==12
keep if inrange(`x'befstat, 2, 4)
keep hhnr persnr `x'geburt `x'sex
rename persnr pid_kid
rename `x'geburt age_kid
replace age_kid = . if age_kid==-1
replace age_kid = 2013-age_kid
rename `x'sex sex_kid

***Merge children and HH
merge m:1 hhnr using "D:\Data\ProjData\SiblingOrder\OtherSamples\SOEP\Temp/`x'hh.dta"
keep if _merge==3
drop _merge

*gen year = 1985

save "D:\Data\ProjData\SiblingOrder\OtherSamples\SOEP\Temp/`x'merged.dta", replace

erase "D:\Data\ProjData\SiblingOrder\OtherSamples\SOEP\Temp/`x'hh.dta"
}
*

***********
*2012-2013*
***********

foreach x in bc bd {
***HH
use "D:\Data\ProjData\SiblingOrder\OtherSamples\SOEP\Data/`x'pbrutto.dta", clear
keep if `x'stell==0
keep hhnr persnr `x'geburt `x'sex
rename persnr pid_hh
rename `x'geburt age_hh
replace age_hh = . if age_hh==-1
replace age_hh = 2013-age_hh
gen age_hhsur = age_hh
rename `x'sex sex_hh

***Keep oldest HH and drop if two or more oldest of same age
bysort hhnr: egen occ = rank(age_hh), field
bysort hhnr: egen nocc = max(occ)
keep if nocc==occ
drop occ nocc
bysort hhnr: gen occ = _n
keep if occ==1
drop occ

save "D:\Data\ProjData\SiblingOrder\OtherSamples\SOEP\Temp/`x'hh.dta", replace

***Children
use "D:\Data\ProjData\SiblingOrder\OtherSamples\SOEP\Data/`x'pbrutto.dta", clear

***Keep all entering biological children
*keep if `x'stell==21
keep if inrange(`x'stell, 21, 24)
keep if inrange(`x'befstat, 2, 4)
keep hhnr persnr `x'geburt `x'sex
rename persnr pid_kid
rename `x'geburt age_kid
replace age_kid = . if age_kid==-1
replace age_kid = 2013-age_kid
rename `x'sex sex_kid

***Merge children and HH
merge m:1 hhnr using "D:\Data\ProjData\SiblingOrder\OtherSamples\SOEP\Temp/`x'hh.dta"
keep if _merge==3
drop _merge

*gen year = 1985

save "D:\Data\ProjData\SiblingOrder\OtherSamples\SOEP\Temp/`x'merged.dta", replace

erase "D:\Data\ProjData\SiblingOrder\OtherSamples\SOEP\Temp/`x'hh.dta"
}
*

clear

use "D:\Data\ProjData\SiblingOrder\OtherSamples\SOEP\Temp/amerged.dta", clear

foreach x in b c d e f g h i j k l m n o p q r s t u v w x y z ba bb bc bd {
append using "D:\Data\ProjData\SiblingOrder\OtherSamples\SOEP\Temp/`x'merged.dta"
}
*

***Drop youngest parent among double entries
bysort pid_kid: egen occ = max(age_hh)
drop if age_hh!=occ
drop occ

***Keep youngest parent when surveyed among duplicates
bysort pid_kid: egen occ = min(age_hhsur)
keep if age_hhsur==occ
drop occ

***Drop remaning duplicates
bysort pid_kid: gen occ = _n
keep if occ==1
drop occ


foreach x in a b c d e f g h i j k l m n o p q r s t u v w x y z ba bb bc bd {
erase "D:\Data\ProjData\SiblingOrder\OtherSamples\SOEP\Temp/`x'merged.dta"
}
*
***Drop sibling families
gen occ = 1
bysort hhnr age_kid: egen nocc = sum(occ) if hhnr!=.
tab nocc
keep if nocc==1
drop occ nocc

***Generate indicator for family size
gen occ = 1
bysort hhnr: egen famsize = sum(occ)
drop occ

***Generate indicators for sibling order
bysort hhnr: egen siblingorder = rank(age_kid), field
drop if siblingorder==.
gen sibling2 = .
replace sibling2 = 0 if inrange(siblingorder, 2, 15)
replace sibling2 = 1 if siblingorder==1

gen sibling3 = .
replace sibling3 = 1 if siblingorder==1
replace sibling3 = 2 if siblingorder==2
replace sibling3 = 3 if inrange(siblingorder, 3, 15)

***Generate indicator for youngest parent of oldest sibling in hh being younger than 41
gen occ = .
replace occ = 1 if sibling2==1 & age_hhsur<41
bysort hhnr: egen parent_filter = sum(occ)
drop occ

save "D:\Data\ProjData\SiblingOrder\OtherSamples\SOEP\Temp/allmerged.dta", replace

****************************************
*MERGE TO POLITICAL OUTCOMES - CHILDREN*
****************************************
rename pid_kid persnr
foreach x in a b c d e f g h i j k l m n o p q r s t u v w x y z ba bb bc bd {
merge 1:1 persnr using "D:\Data\ProjData\SiblingOrder\OtherSamples\SOEP\Data/`x'p.dta"
drop if _merge==2
drop _merge
}
rename persnr pid_kid

***Turnout in 2009
gen turnout_kid = .
replace turnout_kid = 0 if bap126==2
replace turnout_kid = 1 if bap126==1

***Average turnout intention 0-1 (higher values --> more inclined to vote)
replace vp134 = . if vp134==-1
replace vp134 = . if vp134==6
replace vp134 = (-1*vp134+5)/4
replace zp125 = . if zp125==-1
replace zp125 = . if zp125==6
replace zp125 = (-1*zp125+5)/4
egen turnoutint_kid = rowmean(vp134 zp125 turnout_kid)

sum vp134 zp125 turnout_kid
corr vp134 zp125 turnout_kid
alpha vp134 zp125 turnout_kid


***Average political interest 0-10 (higher values --> higher interest)
foreach x in bp75 cp75 dp84 ep73 fp89 gp83 hp89 ip89 jp89 kp91 lp97 mp83 np93 op96 pp110 qp115 rp110 sp110 tp117 up122 vp128 wp118 xp127 yp129 zp122 bap127 bbp128 bcp124 bdp130 {
replace `x' = . if inrange(`x', -2, -1)
}
*
egen polintr_kid = rowmean(bp75 cp75 dp84 ep73 fp89 gp83 hp89 ip89 jp89 kp91 lp97 mp83 np93 op96 pp110 qp115 rp110 sp110 tp117 up122 vp128 wp118 xp127 yp129 zp122 bap127 bbp128 bcp124 bdp130)
replace polintr_kid = -1*10*(polintr_kid-1)/3+10


***Importance of being socially and politically active 0-10 (higher values --> more important)
foreach x in gp0209 ip0809 lp0709 up0608 yp7808 bcp0308 {
replace `x' = . if inrange(`x', -2, -1)
}
*
egen polimport_kid = rowmean(gp0209 ip0809 lp0709 up0608 yp7808 bcp0308)
replace polimport_kid = -1*10*(polimport_kid-1)/3+10




***Drop households if household head not older than kid
gen occ = age_hh-age_kid
gen nocc = 1 if occ<1
bysort hhnr: egen mocc = total(nocc)
keep if mocc==0
drop occ nocc mocc


***Save file for pooled analysis
keep if parent_filter==1
keep hhnr turnoutint_kid siblingorder sibling2 age_kid sex_kid polintr_kid polimport_kid
rename hhnr famid
rename turnoutint_kid turnout
rename siblingorder birthorder
rename sibling2 birthorder_dum
rename sex_kid female
rename age_kid age
rename polintr_kid polinterest
rename polimport_kid civicduty
replace female = female-1
replace age = age-4 //set age to age in 2009 when turnout is measured
gen sample = "SOEP"
keep if inrange(birthorder, 1, 5)
qui: reghdfe turnout birthorder_dum female i.age, absorb(famid) cluster(famid)
sum turnout if e(sample)
gen turnout_average = r(mean)

save "D:\Data\ProjData\SiblingOrder\ReplicationSOEP data 191128.dta", replace

